Read data

Read the data from the kaggle website https://www.kaggle.com/karangadiya/fifa19.

(load("fifa19small.rda"))
## [1] "fifa19small"
rownames(fifa19small) <- fifa19small$Name

fifa19small["R. Lewandowski",]
##                          Name              Club Position Value.EUR Age
## R. Lewandowski R. Lewandowski FC Bayern München       ST   7.7e+07  29
##                Overall Special Preferred.Foot International.Reputation
## R. Lewandowski      90    2152          Right                        4
##                Weak.Foot Skill.Moves Crossing Finishing HeadingAccuracy
## R. Lewandowski         4           4       62        91              85
##                ShortPassing Volleys Dribbling Curve FKAccuracy LongPassing
## R. Lewandowski           83      89        85    77         86          65
##                BallControl Acceleration SprintSpeed Agility Reactions
## R. Lewandowski          89           77          78      78        90
##                Balance ShotPower Jumping Stamina Strength LongShots
## R. Lewandowski      78        88      84      78       84        84
##                Aggression Interceptions Positioning Vision Penalties
## R. Lewandowski         80            39          91     77        88
##                Composure Marking StandingTackle SlidingTackle GKDiving
## R. Lewandowski        86      34             42            19       15
##                GKHandling GKKicking GKPositioning GKReflexes
## R. Lewandowski          6        12             8         10

Feature engineering

Value is skewed. Will be much easier to model sqrt(Value).

fifa19small$SqrtValue <- sqrt(fifa19small$Value.EUR)

fifa19small <- fifa19small[,-c(1, 2, 3, 4, 6)]

Exploration

Value is skewed. Will be much easier to model sqrt(Value).

library("ggplot2")
library("DALEX")
## Welcome to DALEX (version: 0.4.9).
## Find examples and detailed introduction at: https://pbiecek.github.io/PM_VEE/
ggplot(fifa19small, aes(Age, SqrtValue)) +
  geom_point() + geom_smooth(se = FALSE) +
  theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(fifa19small, aes(Age)) +
  geom_histogram() +
  theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fifa19small, aes(Reactions, SqrtValue)) +
  geom_point() + geom_smooth(se = FALSE) +
  theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(fifa19small, aes(Reactions)) +
  geom_histogram() +
  theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fifa19small, aes(BallControl, SqrtValue)) +
  geom_point() + geom_smooth(se = FALSE) +
  theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(fifa19small, aes(BallControl)) +
  geom_histogram() +
  theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fifa19small, aes(ShortPassing, SqrtValue)) +
  geom_point() + geom_smooth(se = FALSE) +
  theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(fifa19small, aes(ShortPassing)) +
  geom_histogram() +
  theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fifa19small, aes(Dribbling, SqrtValue)) +
  geom_point() + geom_smooth(se = FALSE) +
  theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(fifa19small, aes(Dribbling)) +
  geom_histogram() +
  theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Create models

Let’s create following models:

library("gbm")
## Loaded gbm 2.1.5
fifa_gbm_deep <- gbm(SqrtValue~., data = fifa19small, n.trees = 250, interaction.depth = 4)
## Distribution not specified, assuming gaussian ...
fifa_gbm_shallow <- gbm(SqrtValue~., data = fifa19small, n.trees = 250, interaction.depth = 1)
## Distribution not specified, assuming gaussian ...
library("ranger")
fifa_rf <- ranger(SqrtValue~., data = fifa19small, num.trees = 250)

library("rms")
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
fifa_ols <- ols(SqrtValue ~ rcs(Age) + rcs(Special) + rcs(International.Reputation) + rcs(Skill.Moves) + rcs(Crossing) + rcs(Finishing) + rcs(HeadingAccuracy) + rcs(ShortPassing) + rcs(Volleys) + rcs(Dribbling) + rcs(Curve) + rcs(FKAccuracy) + rcs(LongPassing) + rcs(BallControl) + rcs(Acceleration) + rcs(SprintSpeed) + rcs(Agility) + rcs(Reactions) + rcs(Balance) + rcs(ShotPower) + rcs(Jumping) + rcs(Stamina) + rcs(Strength) + rcs(LongShots) + rcs(Aggression) + rcs(Interceptions) + rcs(Positioning) + rcs(Vision) + rcs(Penalties) + rcs(Composure) + rcs(Marking) + rcs(StandingTackle) + rcs(SlidingTackle) + rcs(GKDiving) + rcs(GKHandling) + rcs(GKKicking) + rcs(GKPositioning) + rcs(GKReflexes), data = fifa19small)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied
## = fractied): 5 knots requested with 5 unique values of x. knots set to 3
## interior values.

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied
## = fractied): 5 knots requested with 5 unique values of x. knots set to 3
## interior values.

Create explainers

library("DALEX")
fifa_gbm_exp_deep <- explain(fifa_gbm_deep, 
                        data = fifa19small, 
                        y = fifa19small$SqrtValue^2, 
                        predict_function = function(m,x) 
                          predict(m, x, n.trees = 250)^2,
                        label = "GBM deep")
## Preparation of a new explainer is initiated
##   -> model label       :  GBM deep 
##   -> data              :  16924  rows  41  cols 
##   -> target variable   :  16924  values 
##   -> predict function  :  function(m, x) predict(m, x, n.trees = 250)^2 
##   -> predicted values  :  numerical, min =  2.034728 , mean =  2484612 , max =  108062726  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -12833791 , mean =  49865.43 , max =  16361034  
##   -> model_info        :  package gbm , ver. 2.1.5 , task regression (  default  ) 
##   A new explainer has been created! 
fifa_gbm_exp_shallow <- explain(fifa_gbm_shallow, 
                        data = fifa19small, 
                        y = fifa19small$SqrtValue^2, 
                        predict_function = function(m,x) 
                          predict(m, x, n.trees = 250)^2,
                        label = "GBM shallow")
## Preparation of a new explainer is initiated
##   -> model label       :  GBM shallow 
##   -> data              :  16924  rows  41  cols 
##   -> target variable   :  16924  values 
##   -> predict function  :  function(m, x) predict(m, x, n.trees = 250)^2 
##   -> predicted values  :  numerical, min =  0.6418461 , mean =  2392380 , max =  88928951  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -38611620 , mean =  142097.5 , max =  33732602  
##   -> model_info        :  package gbm , ver. 2.1.5 , task regression (  default  ) 
##   A new explainer has been created! 
fifa_rf_exp <- explain(fifa_rf, 
                        data = fifa19small, 
                        y = fifa19small$SqrtValue^2, 
                        predict_function = function(m,x) 
                          predict(m, x)$predictions^2,
                        label = "RF")
## Preparation of a new explainer is initiated
##   -> model label       :  RF 
##   -> data              :  16924  rows  41  cols 
##   -> target variable   :  16924  values 
##   -> predict function  :  function(m, x) predict(m, x)$predictions^2 
##   -> predicted values  :  numerical, min =  5784.248 , mean =  2433673 , max =  95793175  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -6039446 , mean =  100804.5 , max =  23714519  
##   -> model_info        :  package ranger , ver. 0.11.2 , task regression (  default  ) 
##   A new explainer has been created! 
fifa_rms_exp <- explain(fifa_ols, 
                        data = fifa19small, 
                        y = fifa19small$SqrtValue^2, 
                        predict_function = function(m,x) 
                          predict(m, x)^2,
                        label = "RMS")
## Preparation of a new explainer is initiated
##   -> model label       :  RMS 
##   -> data              :  16924  rows  41  cols 
##   -> target variable   :  16924  values 
##   -> predict function  :  function(m, x) predict(m, x)^2 
##   -> predicted values  :  numerical, min =  0.009600715 , mean =  2452489 , max =  103082633  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -25691692 , mean =  81988.25 , max =  43440034  
##   -> model_info        :  package stats , ver. 3.6.1 , task regression (  default  ) 
##   A new explainer has been created! 

Model performance

library("auditor")
## 
## Attaching package: 'auditor'
## The following object is masked from 'package:DALEX':
## 
##     model_performance
fifa_mr_gbm_shallow <- model_residual(fifa_gbm_exp_shallow)
fifa_mr_gbm_deep <- model_residual(fifa_gbm_exp_deep)
fifa_mr_gbm_rf <- model_residual(fifa_rf_exp)
fifa_mr_gbm_rms <- model_residual(fifa_rms_exp)

plot_residual_boxplot(fifa_mr_gbm_shallow, fifa_mr_gbm_deep, fifa_mr_gbm_rf, fifa_mr_gbm_rms) +
  scale_y_sqrt()

plot_prediction(fifa_mr_gbm_shallow, abline = TRUE) +
  scale_y_sqrt() +  scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

plot_prediction(fifa_mr_gbm_deep, abline = TRUE)  +
  scale_y_sqrt() +  scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

plot_prediction(fifa_mr_gbm_rf, abline = TRUE)  +
  scale_y_sqrt() +  scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

plot_prediction(fifa_mr_gbm_rms, abline = TRUE) +
  scale_y_sqrt() +  scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

Feature importance

library("ingredients")
## 
## Attaching package: 'ingredients'
## The following object is masked from 'package:auditor':
## 
##     plotD3
## The following object is masked from 'package:Hmisc':
## 
##     describe
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp_shallow)
plot(fifa_feat, max_vars = 12)
## Warning: Please note that 'theme_drwhy_colors()' is now deprecated, it is
## better to use 'colors_discrete_drwhy()' instead.

fifa_feat <- ingredients::feature_importance(fifa_gbm_exp_deep)
plot(fifa_feat, max_vars = 12)

fifa_feat <- ingredients::feature_importance(fifa_rf_exp)
plot(fifa_feat, max_vars = 12)

fifa_feat <- ingredients::feature_importance(fifa_rms_exp)
plot(fifa_feat, max_vars = 12)

Partial Dependency Profiles

fifa19_pd_shallow <- ingredients::partial_dependency(fifa_gbm_exp_shallow, variables = c("Age", "Reactions","BallControl", "Dribbling"))

fifa19_pd_deep <- ingredients::partial_dependency(fifa_gbm_exp_deep, variables = c("Age", "Reactions","BallControl", "Dribbling"))

fifa19_pd_rf <- ingredients::partial_dependency(fifa_rf_exp, variables = c("Age", "Reactions","BallControl", "Dribbling"))

fifa19_pd_rms <- ingredients::partial_dependency(fifa_rms_exp, variables = c("Age", "Reactions","BallControl", "Dribbling"))

plot(fifa19_pd_shallow, fifa19_pd_deep, fifa19_pd_rf, fifa19_pd_rms) +
  scale_y_log10()

Break Down

library("iBreakDown")
## 
## Attaching package: 'iBreakDown'
## The following objects are masked from 'package:ingredients':
## 
##     describe, plotD3
## The following object is masked from 'package:auditor':
## 
##     plotD3
## The following object is masked from 'package:Hmisc':
## 
##     describe
fifa_pg <- break_down(fifa_gbm_exp_shallow, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)

fifa_pg <- break_down(fifa_gbm_exp_deep, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)

fifa_pg <- break_down(fifa_rf_exp, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)

fifa_pg <- break_down(fifa_rms_exp, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)

Break Down with interactions

library("iBreakDown")

fifa_pg <- break_down(fifa_gbm_exp_shallow, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)

fifa_pg <- break_down(fifa_gbm_exp_deep, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)

fifa_pg <- break_down(fifa_rf_exp, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)

fifa_pg <- break_down(fifa_rms_exp, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)

Ceteris Paribus

fifa_cp_shallow <- ceteris_paribus(fifa_gbm_exp_shallow,
                           new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
                           variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
                           )

fifa_cp_deep <- ceteris_paribus(fifa_gbm_exp_deep,
                           new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
                           variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
                           )

fifa_cp_rf <- ceteris_paribus(fifa_rf_exp,
                           new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
                           variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
                           )

fifa_cp_rms <- ceteris_paribus(fifa_rms_exp,
                           new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
                           variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
                           )

plot(fifa_cp_shallow, fifa_cp_deep, fifa_cp_rf, fifa_cp_rms, color = "_label_") + 
  show_observations(fifa_cp_rf, fifa_cp_shallow, fifa_cp_deep,fifa_cp_rms, variables = c("Age", "Reactions","BallControl", "Dribbling")) + 
  scale_y_log10()


library(modelStudio)
fifa19_ms <- modelStudio(fifa_gbm_shallow, new_observation = fifa19small[c("Cristiano Ronaldo","R. Lewandowski"), ], B = 5, digits = 0)

op <- modelStudioOptions(
  margin.left = 6
)

print(fifa19_ms, options = op)

r2d3::save_d3_html(fifa19_ms, file = "fifa19.html")